################################################
## Supplemental Tables/Figures #################
################################################

if (!require(pacman)) install.packages("pacman")

pacman::p_load(
  tidyverse, # tidyverse
  panelr, # panel data analysis
  here, # computational reproducibility
  glue, # gluing objects and strings
  tidylog, # logging analysis
  naniar, # missing data
  readxl,
  ggpubr,
  broom,
  patchwork,
  plm,
  broom.mixed,
  estimatr,
  DeclareDesign,
  sensemakr,
  mice,
  testthat,
  modelsummary,
  flextable
)

source(here("functions/utils.r"))
source(here("functions/theme.R"))

ggplot2::theme_set(theme_bw())

# no scientific notation
options(scipen = 999)

#read in data
df <- read_csv(here("panel_data_jul24_2.csv"))

## Table C1
table_df <- df %>%
  dplyr::summarize(female = 1 - mean(male),
                   for_born = 1 - mean(usborn),
                   edu_level = mean(edu),
                   income_level = mean(income),
                   chinese_pct = mean(if_else(str_detect(as.character(natorigin), "1"), 1, 0), na.rm = T),
                   japanese_pct = mean(if_else(str_detect(as.character(natorigin), "6"), 1, 0), na.rm = T),
                   filipino_pct = mean(if_else(str_detect(as.character(natorigin), "4"), 1, 0), na.rm = T),
                   korean_pct = mean(if_else(str_detect(as.character(natorigin), "3"), 1, 0), na.rm = T),
                   vietnamese_pct = mean(if_else(str_detect(as.character(natorigin), "5"), 1, 0), na.rm = T),
                   indian_pct = mean(if_else(str_detect(as.character(natorigin), "2"), 1, 0), na.rm = T)) %>% 
  pivot_longer(cols = everything(), 
               names_to = "Variables",
               values_to = "Percentage") 

table_df[-c(3:4),]$Percentage <- round(table_df[-c(3:4),]$Percentage, 2)*100

table_df$Percentage[table_df$Variables == "edu_level"] <- "4 year degree"
table_df$Percentage[table_df$Variables == "income_level"] <- "$70K - $79,999"

table_df[-c(3:4),]$Percentage <- paste0(table_df[-c(3:4),]$Percentage, "%")

names(table_df)[2] <- "Percentage/Average"

table_df$Variables <- c("% Female", "% Foreign born", "Education level", "Income level", "% Chinese", "% Japanese", "% Filipino", "% Korean", "% Vietnamese", "% Indian")

sum_table <- regulartable(table_df) %>%
  theme_booktabs() %>%
  autofit()

sum_table

save_as_docx(sum_table, path = here("outputs", "table_c1.docx"))

## Table C2
summary(df$age)
summary(df$gender)
summary(df$usborn)
table(df$edu)
table(df$income)
table(df$chinese, df$wave)
table(df$japanese, df$wave)
table(df$filipino, df$wave)
table(df$korean, df$wave)
table(df$vietnamese, df$wave)
table(df$indian, df$wave)
## Table C3 Covidwave## Table C3 Covid Hardship Experiences 

table(df$lostjob, df$wave)
# 37/664
# 0.0557

table(df$incomereduc, df$wave)
#125/664
#0.1883

table(df$rona.behav.sneeze, df$wave)
#242/664
#0.3645

table(df$rona.behav.language, df$wave)
#86/664
#0.1295

table(df$rona.behav.walk, df$wave)
#79/664
#0.1190

table(df$rona.behav.transit, df$wave)
#143/664
#0.2154

table(df$rona.behav.other, df$wave)
#14/664
#0.0211

table(df$ronaunfair, df$wave)
#53/664
#0.080

table(df$ronaunfair, df$wave)
#53/(501+53)

table(df$rona.unfair, df$wave)
#64/(556+64)

table(df$knowrona, df$wave)
#90/(465+90)

## Table C4
df2 <- subset(df, wave == 2)

df2 <- df2 %>%
  mutate(apa.discrim.rona = replace_na(apa.discrim.rona, 0),
         lostjob = replace_na(lostjob, 0),
         incomereduc = replace_na(incomereduc, 0),
         ronaunfairasian = replace_na(ronaunfairasian, 0),
         rona.apa.mistreat = replace_na(rona.apa.mistreat, 0))

mod1 <- lm(apa.discrim.rona ~ lostjob + incomereduc, data = df2) 
mod2 <- lm(apa.discrim.rona ~ ronaunfairasian, data = df2)
mod3 <- lm(apa.discrim.rona ~ rona.apa.mistreat, data = df2)

set.seed(1234)

modelsummary(list(`Hardship` = mod1, `Experience` = mod2, `Witnesing` = mod3),
             fmt = 4,
             coef_omit = "Intercept",
             statistic = c("p = {p.value}"),
             #stars = TRUE,
             vcov = "robust",
             output = here("outputs", "table_c4.docx")) 


#############################################################################
##### Please Note the Following Supplemental Tables Use Imputed Data    #####
##### Values May Appear Different From Text Due To Nature of Imputation #####                         
#############################################################################

## Imputation

df_na_sum <- df %>%
  filter(wave != 1) %>%
  select(`X2020likelyvote`, biden, gendiscrim, apa.discrim.rona, usborn, edu, income, DEM, GOP, age, male, wave, korean, chinese) %>%
  map_dfr(~ is.na(.) %>% mean())

imp <- mice(
  df %>%
    filter(wave != 1) %>%
    select(gendiscrim, apa.discrim.rona, usborn, edu, income, DEM, GOP, age, male, wave, korean, chinese),
  seed = 1234, # for reproducibility
  m = 5, # the number of imputations
  maxit = 1000, # the max number of iterations
  method = "pmm", # predictive mean method
  print = FALSE
)

densityplot(imp, layout = c(1, 2))

imputed <- mice::complete(imp)

# Replace with the imputed values

index <- 665:nrow(df)

df$apa.discrim.rona[index] <- imputed$apa.discrim.rona
df$gendiscrim[index] <- imputed$gendiscrim
df$DEM[index] <- imputed$DEM
df$GOP[index] <- imputed$GOP
df$age[index] <- imputed$age

df %>%
  filter(wave != 1) %>%
  select(gendiscrim, apa.discrim.rona, usborn, edu, DEM, GOP, age, male, natorigin, biden, `2020likelyvote`) %>%
  vis_miss()


## Likely to vote 
# rename the DV
df$likely_vote <- df$`X2020likelyvote`


# create a proxy variable
df$proxy <- rep(subset(df, wave == 1)$gendiscrim, 3)

## check the proxy data distribution
df$proxy %>% table()

df$prior[df$proxy == 0.5] <- "Middle"
df$prior[df$proxy < 0.5] <- "Low"
df$prior[df$proxy > 0.5] <- "High"

min(df$proxy)
max(df$proxy)


# create factor variables

## covariates
df$usborn <- factor(df$usborn)
df$male <- factor(df$male)
df$chinese <- factor(df$chinese)
df$indian <- factor(df$indian)
df$GOP <- factor(df$GOP)
df$DEM <- factor(df$DEM)
df$wave <- factor(df$wave)
df$nat_origin <- factor(df$natorigin)

model.outs <- cal_model_outputs(df)
model.outs.low <- cal_model_outputs(df %>% filter(prior == "Low"))
model.outs.middle <- cal_model_outputs(df %>% filter(prior == "Middle"))
model.outs.high <- cal_model_outputs(df %>% filter(prior == "High"))


### Table C5 

sense.out <- sensemakr(model = lm(likely_vote ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df),
                       treatment = "apa.discrim.rona", 
                       benchmark_covariates = "usborn1",
                       kd = 1:3, 
                       ky = 1:3, 
                       q = 1)

summary(sense.out) %>%
  flextable() %>%
  set_table_properties(layout = "autofit", width = .8) %>%
  save_as_docx(path = here("outputs", "table_c5.docx"))



### Sensitivity analysis

### Table C6
sense.out.biden <- sensemakr(model = lm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df),
                             treatment = "apa.discrim.rona", 
                             benchmark_covariates = "DEM1",
                             kd = 1:3, 
                             ky = 1:3, 
                             q = 1)

summary(sense.out.biden) %>%
  flextable() %>%
  set_table_properties(layout = "autofit", width = .8) %>%
  save_as_docx(path = here("outputs", "table_c6.docx"))

### Table C7

cm <- c("apa.discrim.rona" = "COVID Discrim",
        "gendiscrim" = "General Discrim (Post COVID",
        "usborn1" = "US Born",
        "edu" = "Education",
        "DEM1" = "Democratic Party",
        "GOP1" = "Republican Party",
        "age" = "Age",
        "male1" = "Male",
        "wave3" = "Wave3",
        "proxy" = "Wave 1 General Discrim",
        "apa.discrim.rona:proxy" = "COVID Discrim* Pre-COVID General Discrim",
        "chinese1" = "Chinese",
        "indian1" = "Indian")

turnout_mod <- list(
  
  "OLS" = lm(likely_vote ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df),
  
  "Weighted" = lm(likely_vote ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, weight = weights),
  
  "Within" = plm(likely_vote ~ gendiscrim + apa.discrim.rona + DEM + GOP + age + nat_origin, data = df, index = c("pid", "wave"), model = "within"),
  
  "Interaction term" = lm(likely_vote ~ apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + proxy + apa.discrim.rona:proxy + nat_origin, data = df)
  
)

modelsummary(turnout_mod,
             fmt = 4,
             coef_map = cm,
             coef_omit = "Intercept|nat_origin",
             statistic = c("p = {p.value}"),
             output = here("outputs", "table_c7.docx"))

### Table C8

biden_mod <- list(
  
  "Logit" = glm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, family = binomial),
  
  "Weighted" = glm(biden ~ gendiscrim + apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + nat_origin, data = df, family = binomial, weight = weights),
  
  "Within" = plm(biden ~ gendiscrim + apa.discrim.rona + DEM + GOP + age + nat_origin + wave, data = df, index = c("pid", "wave"), model = "within", family = binomial),    
  
  "Interaction" = glm(biden ~  apa.discrim.rona + gendiscrim + usborn + edu + DEM + GOP + age + male + wave + proxy + apa.discrim.rona:proxy + nat_origin, data = df, family = binomial)
  
)

modelsummary(biden_mod,
             fmt = 4,
             coef_map = cm,
             coef_omit = "Intercept|nat_origin",
             statistic = c("p = {p.value}"),
             output = here("outputs", "table_c8.docx"))



## Table C9
sub.models <- list(
  
  "High Discrimination Pre-COVID" = glm(biden ~ gendiscrim + apa.discrim.rona + usborn + edu + DEM + GOP + age + male + wave + chinese + indian, data = subset(df, prior == "High"), family = binomial, weight = weights),
  
  "Middle Discrimination Pre-COVID" = glm(biden ~ gendiscrim + apa.discrim.rona + usborn + edu + DEM + GOP + age + male + wave + chinese + indian, data = subset(df, prior == "Middle"), family = binomial, weight = weights),
  
  "Low Discrimination Pre-COVID" = glm(biden ~ gendiscrim + apa.discrim.rona + usborn + edu + DEM + GOP + age + male + wave + chinese + indian, data = subset(df, prior == "Low"), family = binomial, weight = weights)
)

modelsummary(sub.models,
             exponentiate = TRUE,
             fmt = 4,
             coef_map = cm,
             coef_omit = "Intercept",
             statistic = c("p = {p.value}"),
             #stars = TRUE,
             output = here("outputs", "table_c9.docx"),
             vcov = "robust")
